******************************
*** CREATE CLICKS DATASETS ***
******************************

*** MAIN DATASET ***

import excel "List_articles.xlsx", firstrow sheet("Main sample refugee articles") clear
sort article

/* dataset click available after signing disclosure agreement with DN */
merge 1:m article using "tot_click.dta"
save "articlesfinal.dta", replace


* tot clicks by municipality (sum over city)
collapse (sum) views, by(article county municipality section subsection date ///
tot_views201501 tot_views201502 tot_views201503 tot_views201504 tot_views201505 ///
tot_views201506 tot_views201507 tot_views201508 tot_views201509 tot_views201510 ///
tot_views201511 tot_views201512 tot_views201601 tot_views201602)


* create month of publication
gen pubmonth=.

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=1/9 {
replace pubmonth=20150`i' if date==20150`i'`j'
}
}

forvalues j=10/31{
forvalues i=1/9 {
replace pubmonth=20150`i' if date==20150`i'`j'
}
}

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=10/12 {
replace pubmonth=2015`i' if date==2015`i'`j'
}
}

forvalues j=10/31{
forvalues i=10/12 {
replace pubmonth=2015`i' if date==2015`i'`j'
}
}

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=1/2 {
replace pubmonth=20160`i' if date==20160`i'`j'
}
}

forvalues j=10/31{
forvalues i=1/2 {
replace pubmonth=20160`i' if date==20160`i'`j'
}
}

* number of articles per month
sort article section subsection
egen id=group(article)
egen ids=group(section)
egen idss=group(subsection)
preserve
egen idm=group(municipality)
collapse idm, by(id ids idss pubmonth)
bysort pubmonth: egen numarticle=count(id)
drop id*
collapse numarticle*, by(pubmonth)
save "num_articles.dta", replace
restore

merge m:1 pubmonth using "num_articles.dta"
drop _merge

* create dummy for sweden and neighbouring countries
gen destino=1 if subsection=="sverige"
replace destino=0 if denmark==1 | norway==1 | finland==1

* normalize views by tot clicks per month
gen norviews=.
label variable norviews "views over total clicks per municip and month"

foreach i in 01 02 03 04 05 06 07 08 09 10 11 12{
replace norviews=views/tot_views2015`i' if pubmonth==2015`i'
}

foreach i in 01 02 {
replace norviews=views/tot_views2016`i' if pubmonth==2016`i'
}

* change format 
recast double norviews*
format norviews* %10.9f

* replace to zero when no clicks
replace views=0 if views==.
replace norviews=0 if norviews==. & municipality!="" & pubmonth!=.

* mean clicks per municip and month 
preserve
collapse views* norviews* tot_views* numarticle*, by(municipality pubmonth)

sort municipality
save "final_click.dta", replace
restore

* total clicks per municip and month
collapse (sum) views* norviews* tot_views* numarticle*, by(municipality pubmonth)

sort municipality
save "final_click_total.dta", replace


*** CLASSIFIED DATASET ***

import excel "List_articles.xlsx", firstrow sheet("Classified articles") clear
sort article

/* dataset click available after signing disclosure agreement with DN */
merge 1:m article using "tot_click.dta"
save "classifiedarticlesfinal.dta", replace

* tot clicks by municipality (sum over city)
collapse (sum) views, by(article county municipality section subsection date flyktingar ///
tot_views201501 tot_views201502 tot_views201503 tot_views201504 tot_views201505 ///
tot_views201506 tot_views201507 tot_views201508 tot_views201509 tot_views201510 ///
tot_views201511 tot_views201512 tot_views201601 tot_views201602)


* create month of publication
gen pubmonth=.

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=1/9 {
replace pubmonth=20150`i' if date==20150`i'`j'
}
}

forvalues j=10/31{
forvalues i=1/9 {
replace pubmonth=20150`i' if date==20150`i'`j'
}
}

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=10/12 {
replace pubmonth=2015`i' if date==2015`i'`j'
}
}

forvalues j=10/31{
forvalues i=10/12 {
replace pubmonth=2015`i' if date==2015`i'`j'
}
}

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=1/2 {
replace pubmonth=20160`i' if date==20160`i'`j'
}
}

forvalues j=10/31{
forvalues i=1/2 {
replace pubmonth=20160`i' if date==20160`i'`j'
}
}

* number of articles per month
sort article flyktingar
egen id=group(article)
egen idf=group(flyktingar)
preserve
egen idm=group(municipality)
collapse idm, by(id idf pubmonth)
bysort pubmonth: egen numarticle_flyk=count(id) if idf==2
bysort pubmonth: egen numarticle_other=count(id) if idf==1
drop id*
collapse numarticle*, by(pubmonth)
save "num_articles_classified.dta", replace
restore

merge m:1 pubmonth using "num_articles_classified.dta"
drop _merge

* normalize views by tot clicks per month
gen norviews=.
label variable norviews "views over total clicks per municip and month"

foreach i in 01 02 03 04 05 06 07 08 09 10 11 12{
replace norviews=views/tot_views2015`i' if pubmonth==2015`i'
}

foreach i in 01 02 {
replace norviews=views/tot_views2016`i' if pubmonth==2016`i'
}

* change format 
recast double norviews*
format norviews* %10.9f

* mean clicks per municip and month and classified flyktingar articles
preserve
collapse views* norviews* tot_views* numarticle*, by(municipality pubmonth flyktingar)

* replace to zero when no clicks
replace norviews=0 if norviews==.
replace views=0 if views==.

* ratio of emphatic articles 
sort municipality pubmonth flyktingar
by municipality pubmonth: gen ratio_emp=views[_n]/(views[_n]+views[_n+1])

save "final_click_classified.dta", replace
restore

* total clicks per municip and month and classified flyktingar articles
collapse views* norviews* tot_views* numarticle*, by(municipality pubmonth flyktingar)

* replace to zero when no clicks
replace norviews=0 if norviews==.
replace views=0 if views==.

* ratio of emphatic articles 
sort municipality pubmonth flyktingar

save "final_click_totalclassified.dta", replace


*** ALTERNATIVE CLASSIFIED DATASET 1 ***

import excel "List_articles.xlsx", firstrow sheet("Main sample refugee articles") clear
sort article

/* dataset click available after signing disclosure agreement with DN */
merge 1:m article using "tot_click.dta"
save "articlesfinal.dta", replace

* tot clicks by municipality (sum over city)
collapse (sum) views, by(article county municipality section subsection date ///
tot_views201501 tot_views201502 tot_views201503 tot_views201504 tot_views201505 ///
tot_views201506 tot_views201507 tot_views201508 tot_views201509 tot_views201510 ///
tot_views201511 tot_views201512 tot_views201601 tot_views201602)


* create month of publication
gen pubmonth=.

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=1/9 {
replace pubmonth=20150`i' if date==20150`i'`j'
}
}

forvalues j=10/31{
forvalues i=1/9 {
replace pubmonth=20150`i' if date==20150`i'`j'
}
}

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=10/12 {
replace pubmonth=2015`i' if date==2015`i'`j'
}
}

forvalues j=10/31{
forvalues i=10/12 {
replace pubmonth=2015`i' if date==2015`i'`j'
}
}

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=1/2 {
replace pubmonth=20160`i' if date==20160`i'`j'
}
}

forvalues j=10/31{
forvalues i=1/2 {
replace pubmonth=20160`i' if date==20160`i'`j'
}
}

* select only sweden and world
keep if subsection=="sverige" | subsection=="varlden"

gen sverige=1 if subsection=="sverige"
replace sverige=0 if subsection!="sverige" 


* normalize views by tot clicks per month
gen norviews=.
label variable norviews "views over total clicks per municip and month"

foreach i in 01 02 03 04 05 06 07 08 09 10 11 12{
replace norviews=views/tot_views2015`i' if pubmonth==2015`i'
}

foreach i in 01 02 {
replace norviews=views/tot_views2016`i' if pubmonth==2016`i'
}

* change format 
recast double norviews*
format norviews* %10.9f

* replace to zero when no clicks
replace views=0 if views==.
replace norviews=0 if norviews==. & municipality!="" & pubmonth!=.

* mean clicks per municip, month and classified sverige world
collapse norviews* tot_views* numarticle* clickarticle ratioarticle, by(municipality pubmonth sverige)

sort municipality
save "final_click_classified1.dta", replace


*** ALTERNATIVE CLASSIFIED DATASET 2 ***

* classification of articles
import excel "List_articles.xlsx", firstrow sheet(Classified articles) clear
sort article

/* dataset click available after signing disclosure agreement with DN */
merge 1:m article using "tot_click.dta"
save "classified2articlesfinal.dta", replace

* tot clicks by municipality (sum over city)
collapse (sum) views, by(article county municipality section subsection date wordrefugee ///
tot_views201501 tot_views201502 tot_views201503 tot_views201504 tot_views201505 ///
tot_views201506 tot_views201507 tot_views201508 tot_views201509 tot_views201510 ///
tot_views201511 tot_views201512 tot_views201601 tot_views201602)


* create month of publication
gen pubmonth=.

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=1/9 {
replace pubmonth=20150`i' if date==20150`i'`j'
}
}

forvalues j=10/31{
forvalues i=1/9 {
replace pubmonth=20150`i' if date==20150`i'`j'
}
}

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=10/12 {
replace pubmonth=2015`i' if date==2015`i'`j'
}
}

forvalues j=10/31{
forvalues i=10/12 {
replace pubmonth=2015`i' if date==2015`i'`j'
}
}

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=1/2 {
replace pubmonth=20160`i' if date==20160`i'`j'
}
}

forvalues j=10/31{
forvalues i=1/2 {
replace pubmonth=20160`i' if date==20160`i'`j'
}
}

* normalize views by tot clicks per month
gen norviews=.
label variable norviews "views over total clicks per municip and month"

foreach i in 01 02 03 04 05 06 07 08 09 10 11 12{
replace norviews=views/tot_views2015`i' if pubmonth==2015`i'
}

foreach i in 01 02 {
replace norviews=views/tot_views2016`i' if pubmonth==2016`i'
}

* change format 
recast double norviews*
format norviews* %10.9f

* mean clicks per municip and month and classified flyktingar articles
collapse views* norviews* tot_views* numarticle*, by(municipality pubmonth wordrefugee)

* replace to zero when no clicks
replace norviews=0 if norviews==.
replace views=0 if views==.

save "final_click_classified2.dta", replace


*** OTHER REFUGEE ARTICLES DATASET ***

* classification of articles
import excel "List_articles.xlsx", firstrow sheet("Other sections refugee articles") clear
sort article

/* dataset click available after signing disclosure agreement with DN */
merge 1:m article using "tot_click.dta"
save "otherarticlesfinal.dta", replace

* tot clicks by municipality (sum over city)
collapse (sum) views, by(article county municipality section subsection date ///
tot_views201501 tot_views201502 tot_views201503 tot_views201504 tot_views201505 ///
tot_views201506 tot_views201507 tot_views201508 tot_views201509 tot_views201510 ///
tot_views201511 tot_views201512 tot_views201601 tot_views201602)


* create month of publication
gen pubmonth=.

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=1/9 {
replace pubmonth=20150`i' if date==20150`i'`j'
}
}

forvalues j=10/31{
forvalues i=1/9 {
replace pubmonth=20150`i' if date==20150`i'`j'
}
}

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=10/12 {
replace pubmonth=2015`i' if date==2015`i'`j'
}
}

forvalues j=10/31{
forvalues i=10/12 {
replace pubmonth=2015`i' if date==2015`i'`j'
}
}

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=1/2 {
replace pubmonth=20160`i' if date==20160`i'`j'
}
}

forvalues j=10/31{
forvalues i=1/2 {
replace pubmonth=20160`i' if date==20160`i'`j'
}
}

* normalize views by tot clicks per month
gen norviews=.
label variable norviews "views over total clicks per municip and month"

foreach i in 01 02 03 04 05 06 07 08 09 10 11 12{
replace norviews=views/tot_views2015`i' if pubmonth==2015`i'
}

foreach i in 01 02 {
replace norviews=views/tot_views2016`i' if pubmonth==2016`i'
}

* change format 
recast double norviews*
format norviews* %10.9f

* replace to zero when no clicks
replace views=0 if views==.
replace norviews=0 if norviews==. & municipality!="" & pubmonth!=.

* mean clicks per municip and month 
collapse views* norviews* tot_views* numarticle*, by(municipality pubmonth)

sort municipality
save "final_click_other.dta", replace


*** MAJOR 2015 EVENTS ARTICLES DATASET ***

* classification of articles
import excel "List_articles.xlsx", firstrow sheet("Major 2015 events articles") clear
sort article

/* dataset click available after signing disclosure agreement with DN */
merge 1:m article using "tot_click.dta"
save "eventsarticlesfinal.dta", replace

* tot clicks by municipality (sum over city)
collapse (sum) views, by(article county municipality section subsection date ///
tot_views201501 tot_views201502 tot_views201503 tot_views201504 tot_views201505 ///
tot_views201506 tot_views201507 tot_views201508 tot_views201509 tot_views201510 ///
tot_views201511 tot_views201512 tot_views201601 tot_views201602)


* create month of publication
gen pubmonth=.

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=1/9 {
replace pubmonth=20150`i' if date==20150`i'`j'
}
}

forvalues j=10/31{
forvalues i=1/9 {
replace pubmonth=20150`i' if date==20150`i'`j'
}
}

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=10/12 {
replace pubmonth=2015`i' if date==2015`i'`j'
}
}

forvalues j=10/31{
forvalues i=10/12 {
replace pubmonth=2015`i' if date==2015`i'`j'
}
}

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=1/2 {
replace pubmonth=20160`i' if date==20160`i'`j'
}
}

forvalues j=10/31{
forvalues i=1/2 {
replace pubmonth=20160`i' if date==20160`i'`j'
}
}

* normalize views by tot clicks per month
gen norviews=.
label variable norviews "views over total clicks per municip and month"

foreach i in 01 02 03 04 05 06 07 08 09 10 11 12{
replace norviews=views/tot_views2015`i' if pubmonth==2015`i'
}

foreach i in 01 02 {
replace norviews=views/tot_views2016`i' if pubmonth==2016`i'
}

* change format 
recast double norviews*
format norviews* %10.9f

* replace to zero when no clicks
replace views=0 if views==.
replace norviews=0 if norviews==. & municipality!="" & pubmonth!=.

* mean clicks per municip and month 
collapse views* norviews* tot_views* numarticle*, by(municipality pubmonth)

sort municipality
save "final_click_events.dta", replace


*** ACCIDENTS ARTICLES DATASET ***

* classification of articles
import excel "List_articles.xlsx", firstrow sheet("Accidents articles") clear
sort article

/* dataset click available after signing disclosure agreement with DN */
merge 1:m article using "tot_click.dta"
save "accidentarticlesfinal.dta", replace

* tot clicks by municipality (sum over city)
collapse (sum) views, by(article county municipality section subsection date ///
tot_views201501 tot_views201502 tot_views201503 tot_views201504 tot_views201505 ///
tot_views201506 tot_views201507 tot_views201508 tot_views201509 tot_views201510 ///
tot_views201511 tot_views201512 tot_views201601 tot_views201602)


* create month of publication
gen pubmonth=.

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=1/9 {
replace pubmonth=20150`i' if date==20150`i'`j'
}
}

forvalues j=10/31{
forvalues i=1/9 {
replace pubmonth=20150`i' if date==20150`i'`j'
}
}

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=10/12 {
replace pubmonth=2015`i' if date==2015`i'`j'
}
}

forvalues j=10/31{
forvalues i=10/12 {
replace pubmonth=2015`i' if date==2015`i'`j'
}
}

foreach j in 01 02 03 04 05 06 07 08 09{
forvalues i=1/2 {
replace pubmonth=20160`i' if date==20160`i'`j'
}
}

forvalues j=10/31{
forvalues i=1/2 {
replace pubmonth=20160`i' if date==20160`i'`j'
}
}

* normalize views by tot clicks per month
gen norviews=.
label variable norviews "views over total clicks per municip and month"

foreach i in 01 02 03 04 05 06 07 08 09 10 11 12{
replace norviews=views/tot_views2015`i' if pubmonth==2015`i'
}

foreach i in 01 02 {
replace norviews=views/tot_views2016`i' if pubmonth==2016`i'
}

* change format 
recast double norviews*
format norviews* %10.9f

* replace to zero when no clicks
replace views=0 if views==.
replace norviews=0 if norviews==. & municipality!="" & pubmonth!=.

* mean clicks per municip and month 
collapse views* norviews* tot_views* numarticle*, by(municipality pubmonth)

sort municipality
save "final_click_accidents.dta", replace
